1 Libraries and what not

## Base RF model
set.seed(42)

library(ggplot2)  # for autoplot() generic
library(dplyr)
library(ggpubr)
library(ggcorrplot)
library(sf)
library(stars)
library(tmap)
library(car)

2 Data processing

dat_sf <- st_read("./data/glacier_clim.shp")
## Reading layer `glacier_clim' from data source `/Users/u0784726/Dropbox/Data/devtools/glacier_mb/data/glacier_clim.shp' using driver `ESRI Shapefile'
## Simple feature collection with 95086 features and 30 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 67.47845 ymin: 27.4918 xmax: 103.8915 ymax: 45.35089
## Geodetic CRS:  GCS_unknown
my_vars <- c("mb_mwea", "area_km2", "z_med", "hi", "z_aspct", "z_slope", "tau",
             "t2_w_18", "t2_w_d", "t2_w_b", 
             "ppt_a_18", "ppt_a_d", "ppt_s_18", "ppt_s_d")
my_labels <- c("Mass balance", "Area (km2)", "Median elevation", "Hypsometry",
               "Aspect", "Slope", "tau", 
               "2m temperature (AMJJAS; 2018)",
               "2m temperature change (AMJJAS; 2018 - 2008)",
               "2m temperature trend (AMJJAS; 1991 - 2020)",
               "Annual precipitation (2018)",
               "Annual precipitation change (2018 - 2008)",
               "Precipitation seasonality (2018)",
               "Precipitation seasonality change (2018 - 2008)"
)
n_vars <- length(my_vars)

3 Correlation matrix

dat <- dat_sf %>%
  st_drop_geometry() %>% 
  dplyr::select(any_of(my_vars))
dat_cor <- cor(dat)
ggcorrplot(dat_cor)

Variance inflation factors (\(>5\) is a conservative threshold for multicollinearity).

fit <- lm(mb_mwea ~ ., dat)
vif(fit)
## area_km2    z_med       hi  z_aspct  z_slope      tau  t2_w_18   t2_w_d 
## 1.344225 3.553993 1.106417 1.010025 1.343442 1.947931 2.004656 2.090469 
##   t2_w_b ppt_a_18  ppt_a_d ppt_s_18  ppt_s_d 
## 4.158392 1.371184 1.716614 2.206770 1.426864

4 Histograms

for(i in 1:n_vars) {
  p1 <- gghistogram(dat, my_vars[i], main = my_labels[i])
  print(p1)
}

Possible variables for log-transform:

  • area_km2
  • z_slope
  • tau
  • tp_18

4.1 Log-transform

dat$larea_km2 <- log(dat$area_km2)
p1 <- gghistogram(dat, "larea_km2")
print(p1)

dat$lz_slope <- log(dat$z_slope)
p1 <- gghistogram(dat, "lz_slope")
print(p1)

dat$ltau <- log(dat$tau)
p1 <- gghistogram(dat, "ltau")
print(p1)

dat$lppt_a_18 <- log(dat$ppt_a_18)
p1 <- gghistogram(dat, "lppt_a_18")
print(p1)

5 Maps

Load basemap

basemap <- read_stars("~/Dropbox/Data/summer/natural_earth/HYP_50M_SR_W/HYP_50M_SR_W.tif")

basemap2 <- st_crop(basemap, st_bbox(dat_sf))
basemap2 <- st_rgb(basemap2)
for(i in 1:n_vars) {
m1 <- tm_shape(basemap2) +
  tm_raster(palette = "Greys", alpha = 0.5) +
  tm_shape(dat_sf) +
  tm_symbols(col = my_vars[i], 
             size = 0.25, 
             alpha = 0.75, 
             style = "quantile") +
  tm_layout(main.title = my_labels[i], 
            legend.position = c("left", "bottom"),
            legend.bg.color = "white",
            legend.bg.alpha = 0.5)

  print(m1)
}